From 1bf21e73cc71d8f3374912155cbae38178d60cea Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Sun, 16 Nov 2003 18:11:18 +0000 Subject: [PATCH] bitkeeper revision 1.604 (3fb7bdc6G8jZv2m6lWAfV3eKAgnirQ) vbd.h, sched.h, xen_vbd.c, xen_block.c, domain.c: Cleanups and a few bug fixes to the new vbd code. --- xen/common/domain.c | 17 +- xen/drivers/block/xen_block.c | 50 +-- xen/drivers/block/xen_vbd.c | 628 +++++++++++++++++++++------------- xen/include/xeno/sched.h | 8 +- xen/include/xeno/vbd.h | 4 +- 5 files changed, 426 insertions(+), 281 deletions(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index 33bc52c118..9edea30a8b 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -52,7 +52,6 @@ struct task_struct *do_createdomain(unsigned int dom_id, unsigned int cpu) spin_lock_init(&p->blk_ring_lock); spin_lock_init(&p->page_lock); - spin_lock_init(&p->physdev_lock); p->shared_info = (void *)get_free_page(GFP_KERNEL); memset(p->shared_info, 0, PAGE_SIZE); @@ -63,8 +62,6 @@ struct task_struct *do_createdomain(unsigned int dom_id, unsigned int cpu) init_blkdev_info(p); - INIT_LIST_HEAD(&p->physdisk_aces); - p->addr_limit = USER_DS; sched_add_domain(p); @@ -132,11 +129,6 @@ void __kill_domain(struct task_struct *p) unlink_blkdev_info(p); -#if 0 - for ( i = 0; i < XEN_MAX_VBDS; i++ ) - xen_vbd_delete(p, i); -#endif - for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) unlink_net_vif(p->net_vif_list[i]); @@ -302,16 +294,11 @@ void release_task(struct task_struct *p) printk("Releasing task %d\n", p->domain); /* - * This frees up blkdev rings. Totally safe since blkdev ref counting - * actually uses the task_struct refcnt. + * This frees up blkdev rings and vbd-access lists. Totally safe since + * blkdev ref counting actually uses the task_struct refcnt. */ destroy_blkdev_info(p); -#if 0 - /* Free up the physdisk access control info */ - destroy_physdisk_aces(p); -#endif - /* Free all memory associated with this domain. */ free_page((unsigned long)p->mm.perdomain_pt); UNSHARE_PFN(virt_to_page(p->shared_info)); diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c index 01def5da58..bc4dad260b 100644 --- a/xen/drivers/block/xen_block.c +++ b/xen/drivers/block/xen_block.c @@ -166,6 +166,7 @@ static void maybe_trigger_io_schedule(void) /****************************************************************** * COMPLETION CALLBACK -- Called as bh->b_end_io() + * NB. This can be called from interrupt context! */ static void end_block_io_op(struct buffer_head *bh, int uptodate) @@ -208,11 +209,11 @@ long do_block_io_op(block_io_op_t *u_block_io_op) block_io_op_t op; struct task_struct *p = current; - if (copy_from_user(&op, u_block_io_op, sizeof(op))) + if ( copy_from_user(&op, u_block_io_op, sizeof(op)) ) return -EFAULT; - switch (op.cmd) { - + switch ( op.cmd ) + { case BLOCK_IO_OP_SIGNAL: /* simply indicates there're reqs outstanding => add current to list */ add_to_blkdev_list_tail(p); @@ -277,7 +278,6 @@ long do_block_io_op(block_io_op_t *u_block_io_op) ret = -ENOSYS; } - return ret; } @@ -439,7 +439,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; unsigned short nr_sects; unsigned long buffer, flags; - int i, rc, tot_sects; + int i, tot_sects; pending_req_t *pending_req; /* We map virtual scatter/gather segments to physical segments. */ @@ -484,23 +484,29 @@ static void dispatch_rw_block_io(struct task_struct *p, int index) phys_seg[nr_psegs].nr_sects = nr_sects; /* Translate the request into the relevant 'physical device' */ - new_segs = 1; - rc = vbd_translate(&phys_seg[nr_psegs], &new_segs, p, operation); - - /* If it fails we bail (unless the caller is priv => has raw access) */ - if(rc) { - if(!IS_PRIV(p)) { - printk("access denied: %s of [%ld,%ld] on dev=%04x\n", - operation == READ ? "read" : "write", - req->sector_number + tot_sects, - req->sector_number + tot_sects + nr_sects, - req->device); - goto bad_descriptor; - } - - /* SMH: skanky hack; clear any 'partition' info in device */ + new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation); + + /* If it fails we bail (unless the caller is privileged). */ + if ( new_segs < 0 ) + { + if ( unlikely(new_segs != -ENODEV) || unlikely(!IS_PRIV(p)) ) + { + DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n", + operation == READ ? "read" : "write", + req->sector_number + tot_sects, + req->sector_number + tot_sects + nr_sects, + req->device); + goto bad_descriptor; + } + + /* + * XXX Clear any 'partition' info in device. This works because IDE + * ignores the partition bits anyway. Only SCSI needs this hack, + * and it has four bits to clear. + */ phys_seg[nr_psegs].dev = req->device & 0xFFF0; - } + new_segs = 1; + } nr_psegs += new_segs; if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG(); @@ -620,6 +626,7 @@ void init_blkdev_info(struct task_struct *p) clear_page(p->blk_ring_base); SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain); p->blkdev_list.next = NULL; + spin_lock_init(&p->vbd_lock); } /* End-of-day teardown for a domain. */ @@ -628,6 +635,7 @@ void destroy_blkdev_info(struct task_struct *p) ASSERT(!__on_blkdev_list(p)); UNSHARE_PFN(virt_to_page(p->blk_ring_base)); free_page((unsigned long)p->blk_ring_base); + destroy_all_vbds(p); } void unlink_blkdev_info(struct task_struct *p) diff --git a/xen/drivers/block/xen_vbd.c b/xen/drivers/block/xen_vbd.c index 6af9e9523f..b2a495769a 100644 --- a/xen/drivers/block/xen_vbd.c +++ b/xen/drivers/block/xen_vbd.c @@ -30,7 +30,6 @@ extern int scsi_probe_devices(xen_disk_info_t *xdi); #define DPRINTK(_f, _a...) ((void)0) #endif - /* XXX SMH: crappy 'hash function' .. fix when care. */ #define HSH(_x) (((_x) >> 6) & (VBD_HTAB_SZ - 1)) @@ -38,107 +37,231 @@ extern int scsi_probe_devices(xen_disk_info_t *xdi); ** Create a new VBD; all this involves is adding an entry to the domain's ** vbd hash table; caller must be privileged. */ -long vbd_create(vbd_create_t *create_params) +long vbd_create(vbd_create_t *create) { struct task_struct *p; - vbd_t *new_vbd, *v; - int h; + vbd_t *new_vbd, **pv; + long ret = 0; - if(!IS_PRIV(current)) - return -EPERM; + if( !IS_PRIV(current) ) + return -EPERM; - p = find_domain_by_id(create_params->domain); + if ( (p = find_domain_by_id(create->domain)) == NULL ) + { + DPRINTK("vbd_create attempted for non-existent domain %d\n", + create->domain); + return -EINVAL; + } - if (!p) { - printk("vbd_create attempted for non-existent domain %d\n", - create_params->domain); - return -EINVAL; + spin_lock(&p->vbd_lock); + + for ( pv = &p->vbdtab[HSH(create->vdevice)]; + *pv != NULL; + pv = &(*pv)->next ) + { + if ( (*pv)->vdevice == create->vdevice ) + { + DPRINTK("vbd_create attempted for already existing vbd\n"); + ret = -EINVAL; + goto out; + } + if ( (*pv)->vdevice > create->vdevice ) + break; } new_vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL); - new_vbd->vdevice = create_params->vdevice; - new_vbd->mode = create_params->mode; + new_vbd->vdevice = create->vdevice; + new_vbd->mode = create->mode; new_vbd->extents = (xen_extent_le_t *)NULL; new_vbd->next = (vbd_t *)NULL; - h = HSH(create_params->vdevice); - if(p->vbdtab[h]) { - for(v = p->vbdtab[h]; v->next; v = v->next) - ; - v->next = new_vbd; - } else p->vbdtab[h] = new_vbd; + *pv = new_vbd; + out: + spin_unlock(&p->vbd_lock); put_task_struct(p); - - return 0; + return ret; } /* ** Add an extent to an existing VBD; fails if the VBD doesn't exist. ** Doesn't worry about overlapping extents (e.g. merging etc) for now. */ -long vbd_add(vbd_add_t *add_params) +long vbd_add(vbd_add_t *add) { struct task_struct *p; - xen_extent_le_t *x, *xele; + xen_extent_le_t **px, *x; vbd_t *v; - int h; - - if(!IS_PRIV(current)) - return -EPERM; + long ret = 0; - p = find_domain_by_id(add_params->domain); + if ( !IS_PRIV(current) ) + return -EPERM; - if (!p) { - printk("vbd_add attempted for non-existent domain %d\n", - add_params->domain); - return -EINVAL; + if ( (p = find_domain_by_id(add->domain)) == NULL ) + { + DPRINTK("vbd_add attempted for non-existent domain %d\n", + add->domain); + return -EINVAL; } - h = HSH(add_params->vdevice); + spin_lock(&p->vbd_lock); - for(v = p->vbdtab[h]; v; v = v->next) - if(v->vdevice == add_params->vdevice) - break; + for ( v = p->vbdtab[HSH(add->vdevice)]; v != NULL; v = v->next ) + if ( v->vdevice == add->vdevice ) + break; - if(!v) { - printk("vbd_add; attempted to add extent to non-existent VBD.\n"); - return -EINVAL; + if ( v == NULL ) + { + DPRINTK("vbd_add; attempted to add extent to non-existent VBD.\n"); + ret = -EINVAL; + goto out; } - xele = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL); - xele->extent.device = add_params->extent.device; - xele->extent.start_sector = add_params->extent.start_sector; - xele->extent.nr_sectors = add_params->extent.nr_sectors; - xele->next = (xen_extent_le_t *)NULL; - - if(!v->extents) { - v->extents = xele; - } else { - for(x = v->extents; x->next; x = x->next) - ; - x->next = xele; - } + x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL); + x->extent.device = add->extent.device; + x->extent.start_sector = add->extent.start_sector; + x->extent.nr_sectors = add->extent.nr_sectors; + x->next = (xen_extent_le_t *)NULL; - put_task_struct(p); + for ( px = &v->extents; *px != NULL; px = &(*px)->next ) + continue; + + *px = x; - return 0; + out: + spin_unlock(&p->vbd_lock); + put_task_struct(p); + return ret; } -long vbd_remove(vbd_remove_t *remove_params) +long vbd_remove(vbd_remove_t *remove) { - if(!IS_PRIV(current)) - return -EPERM; + struct task_struct *p; + xen_extent_le_t **px, *x; + vbd_t *v; + long ret = 0; + + if ( !IS_PRIV(current) ) + return -EPERM; + + if ( (p = find_domain_by_id(remove->domain)) == NULL ) + { + DPRINTK("vbd_remove attempted for non-existent domain %d\n", + remove->domain); + return -EINVAL; + } + + spin_lock(&p->vbd_lock); + + for ( v = p->vbdtab[HSH(remove->vdevice)]; v != NULL; v = v->next ) + if ( v->vdevice == remove->vdevice ) + break; + + if ( v == NULL ) + { + DPRINTK("vbd_remove; attempt to remove ext from non-existent VBD.\n"); + ret = -EINVAL; + goto out; + } + + for ( px = &v->extents; *px != NULL; px = &(*px)->next ) + if ( (*px)->extent.start_sector == remove->extent.start_sector ) + break; + + if ( ((x = *px) == NULL) || + (x->extent.nr_sectors != remove->extent.nr_sectors) || + (x->extent.device != remove->extent.device) ) + { + DPRINTK("vbd_remove: attempt to remove non-matching extent.\n"); + ret = -EINVAL; + goto out; + } - return -ENOSYS; + *px = x->next; + kfree(x); + + out: + spin_unlock(&p->vbd_lock); + put_task_struct(p); + return ret; } -long vbd_delete(vbd_delete_t *delete_params) +long vbd_delete(vbd_delete_t *delete) { - if(!IS_PRIV(current)) - return -EPERM; + struct task_struct *p; + vbd_t *v, **pv; + xen_extent_le_t *x, *t; + + if( !IS_PRIV(current) ) + return -EPERM; + + if ( (p = find_domain_by_id(delete->domain)) == NULL ) + { + DPRINTK("vbd_delete attempted for non-existent domain %d\n", + delete->domain); + return -EINVAL; + } + + spin_lock(&p->vbd_lock); + + for ( pv = &p->vbdtab[HSH(delete->vdevice)]; + *pv != NULL; + pv = &(*pv)->next ) + { + if ( (*pv)->vdevice == delete->vdevice ) + goto found; + } + + DPRINTK("vbd_delete attempted for non-existing VBD.\n"); - return -ENOSYS; + spin_unlock(&p->vbd_lock); + put_task_struct(p); + return -EINVAL; + + found: + v = *pv; + *pv = v->next; + x = v->extents; + kfree(v); + + while ( x != NULL ) + { + t = x->next; + kfree(x); + x = t; + } + + spin_unlock(&p->vbd_lock); + put_task_struct(p); + return 0; +} + + +void destroy_all_vbds(struct task_struct *p) +{ + int i; + vbd_t *v; + xen_extent_le_t *x, *t; + + spin_lock(&p->vbd_lock); + for ( i = 0; i < VBD_HTAB_SZ; i++ ) + { + while ( (v = p->vbdtab[i]) != NULL ) + { + p->vbdtab[i] = v->next; + + x = v->extents; + kfree(v); + + while ( x != NULL ) + { + t = x->next; + kfree(x); + x = t; + } + } + } + spin_unlock(&p->vbd_lock); } @@ -153,40 +276,45 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) xen_extent_le_t *x; xen_disk_t cur_disk; vbd_t *v; - int i, ret; - - for(i = 0; i < VBD_HTAB_SZ; i++) { - - for(v = p->vbdtab[i]; v; v = v->next) { - - /* SMH: don't ever expect this to happen, hence verbose printk */ - if ( xdi->count == xdi->max ) { - printk("vbd_probe_devices: out of space for probe.\n"); - return -ENOMEM; - } - - cur_disk.device = v->vdevice; - cur_disk.info = XD_FLAG_VIRT | XD_TYPE_DISK; - if(!VBD_CAN_WRITE(v)) - cur_disk.info |= XD_FLAG_RO; - cur_disk.capacity = 0 ; - for(x = v->extents; x; x = x->next) - cur_disk.capacity += x->extent.nr_sectors; - cur_disk.domain = p->domain; - - /* Now copy into relevant part of user-space buffer */ - if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk, - sizeof(xen_disk_t))) < 0) { - printk("vbd_probe_devices: copy_to_user failed [rc=%d]\n", - ret); - return ret; - } - - - xdi->count++; - } + int i; + + spin_lock(&p->vbd_lock); + + for ( i = 0; i < VBD_HTAB_SZ; i++ ) + { + for ( v = p->vbdtab[i]; v != NULL; v = v->next ) + { + if ( xdi->count == xdi->max ) + { + DPRINTK("vbd_probe_devices: out of space for probe.\n"); + spin_unlock(&p->vbd_lock); + return -ENOMEM; + } + + cur_disk.device = v->vdevice; + cur_disk.info = XD_FLAG_VIRT | XD_TYPE_DISK; + if ( !VBD_CAN_WRITE(v) ) + cur_disk.info |= XD_FLAG_RO; + cur_disk.capacity = 0 ; + for ( x = v->extents; x != NULL; x = x->next ) + cur_disk.capacity += x->extent.nr_sectors; + cur_disk.domain = p->domain; + + /* Now copy into relevant part of user-space buffer */ + if( copy_to_user(&xdi->disks[xdi->count], + &cur_disk, + sizeof(xen_disk_t)) ) + { + DPRINTK("vbd_probe_devices: copy_to_user failed\n"); + spin_unlock(&p->vbd_lock); + return -EFAULT; + } + + xdi->count++; + } } + spin_unlock(&p->vbd_lock); return 0; } @@ -199,192 +327,218 @@ static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) ** all domains ("VBD_PROBE_ALL") -- both of these cases require the ** caller to be privileged. */ -long vbd_probe(vbd_probe_t *probe_params) +long vbd_probe(vbd_probe_t *probe) { struct task_struct *p = NULL; - short putp = 0; - int ret = 0; - - if(probe_params->domain) { - - /* we can only probe for ourselves unless we're privileged */ - if(probe_params->domain != current->domain && !IS_PRIV(current)) - return -EPERM; - - if(probe_params->domain != VBD_PROBE_ALL) { - - p = find_domain_by_id(probe_params->domain); - - if (!p) { - printk("vbd_probe attempted for non-existent domain %d\n", - probe_params->domain); - return -EINVAL; - } - - putp = 1; - } - - } else - /* default is to probe for ourselves */ - p = current; - - - if(!p || IS_PRIV(p)) { + unsigned long flags; + long ret = 0; + + if ( probe->domain != 0 ) + { + /* We can only probe for ourselves unless we're privileged. */ + if( (probe->domain != current->domain) && !IS_PRIV(current) ) + return -EPERM; + + if ( (probe->domain != VBD_PROBE_ALL) && + ((p = find_domain_by_id(probe->domain)) == NULL) ) + { + DPRINTK("vbd_probe attempted for non-existent domain %d\n", + probe->domain); + return -EINVAL; + } + } + else + { + /* Default is to probe for ourselves. */ + p = current; + get_task_struct(p); /* to mirror final put_task_struct */ + } - /* privileged domains always get access to the 'real' devices */ - if((ret = ide_probe_devices(&probe_params->xdi))) { - printk("vbd_probe: error %d in probing ide devices\n", ret); - goto out; - } - if((ret = scsi_probe_devices(&probe_params->xdi))) { - printk("vbd_probe: error %d in probing scsi devices\n", ret); - goto out; - } + if ( (probe->domain == VBD_PROBE_ALL) || IS_PRIV(p) ) + { + /* Privileged domains always get access to the 'real' devices. */ + if ( (ret = ide_probe_devices(&probe->xdi)) != 0 ) + { + DPRINTK("vbd_probe: error %d in probing ide devices\n", ret); + goto out; + } + if ( (ret = scsi_probe_devices(&probe->xdi)) != 0 ) + { + DPRINTK("vbd_probe: error %d in probing scsi devices\n", ret); + goto out; + } } - - if(!p) { - - u_long flags; - - read_lock_irqsave (&tasklist_lock, flags); - - p = &idle0_task; - while ( (p = p->next_task) != &idle0_task ) { - if (!is_idle_task(p)) { - if((ret = vbd_probe_devices(&probe_params->xdi, p))) { - printk("vbd_probe: error %d in probing virtual devices\n", - ret); - read_unlock_irqrestore(&tasklist_lock, flags); - goto out; - } - } - } - - read_unlock_irqrestore(&tasklist_lock, flags); - - } else { - - /* probe for disks and VBDs for just 'p' */ - if((ret = vbd_probe_devices(&probe_params->xdi, p))) { - printk("vbd_probe: error %d in probing virtual devices\n", ret); - goto out; - } + if ( probe->domain == VBD_PROBE_ALL ) + { + read_lock_irqsave(&tasklist_lock, flags); + p = &idle0_task; + while ( (p = p->next_task) != &idle0_task ) + { + if ( !is_idle_task(p) ) + { + if( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) + { + DPRINTK("vbd_probe: error %d in probing virtual devices\n", + ret); + read_unlock_irqrestore(&tasklist_lock, flags); + goto out; + } + } + } + read_unlock_irqrestore(&tasklist_lock, flags); + } + else + { + if ( (ret = vbd_probe_devices(&probe->xdi, p)) ) + { + DPRINTK("vbd_probe: error %d in probing virtual devices\n", ret); + goto out; + } } out: - if(putp) - put_task_struct(p); - + if ( p != NULL ) + put_task_struct(p); return ret; } -long vbd_info(vbd_info_t *info_params) + +long vbd_info(vbd_info_t *info) { - struct task_struct *p = NULL; + struct task_struct *p; xen_extent_le_t *x; xen_extent_t *extents; vbd_t *v; - int h, ret = 0; + long ret = 0; - if(info_params->domain != current->domain && !IS_PRIV(current)) - return -EPERM; - - p = find_domain_by_id(info_params->domain); - - if (!p) { - printk("vbd_info attempted for non-existent domain %d\n", - info_params->domain); - return -EINVAL; + if ( (info->domain != current->domain) && !IS_PRIV(current) ) + return -EPERM; + + if ( (p = find_domain_by_id(info->domain)) == NULL ) + { + DPRINTK("vbd_info attempted for non-existent domain %d\n", + info->domain); + return -EINVAL; } - h = HSH(info_params->vdevice); + spin_lock(&p->vbd_lock); - for(v = p->vbdtab[h]; v; v = v->next) - if(v->vdevice == info_params->vdevice) - break; + for ( v = p->vbdtab[HSH(info->vdevice)]; v != NULL; v = v->next ) + if ( v->vdevice == info->vdevice ) + break; - if(!v) { - printk("vbd_info attempted on non-existent VBD.\n"); - ret = -EINVAL; - goto out; + if ( v == NULL ) + { + DPRINTK("vbd_info attempted on non-existent VBD.\n"); + ret = -EINVAL; + goto out; } - info_params->mode = v->mode; - info_params->nextents = 0; - - extents = info_params->extents; // convenience - - for(x = v->extents; x; x = x->next) { - if((ret = copy_to_user(extents++, &x->extent, - sizeof(xen_extent_t))) < 0) { - printk("vbd_info: copy_to_user failed [rc=%d]\n", ret); - goto out; - } - info_params->nextents++; + info->mode = v->mode; + info->nextents = 0; + + extents = info->extents; + for ( x = v->extents; x != NULL; x = x->next ) + { + if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) ) + { + DPRINTK("vbd_info: copy_to_user failed\n"); + ret = -EFAULT; + goto out; + } + extents++; + info->nextents++; } out: + spin_unlock(&p->vbd_lock); put_task_struct(p); return ret; } -int vbd_translate(phys_seg_t * pseg, int *nr_segs, - struct task_struct *p, int operation) +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) { xen_extent_le_t *x; vbd_t *v; - int h; - long sec; - - h = HSH(pseg->dev); - - for(v = p->vbdtab[h]; v; v = v->next) - if(v->vdevice == pseg->dev) - break; - - if(!v) { - if(!IS_PRIV(p)) - printk("vbd_translate; domain %d attempted to access " - "non-existent VBD.\n", p->domain); - return -ENODEV; - } - - if(operation == READ && !VBD_CAN_READ(v)) - return -EACCES; - - if(operation == WRITE && !VBD_CAN_WRITE(v)) - return -EACCES; - + unsigned long sec_off, nr_secs; - /* Now iterate through the list of xen_extents, working out which - should be used to perform the translation. */ - sec = pseg->sector_number; - for(x = v->extents; x; x = x->next) { + spin_lock(&p->vbd_lock); - if(sec < x->extent.nr_sectors) { + for ( v = p->vbdtab[HSH(pseg->dev)]; v != NULL; v = v->next ) + if ( v->vdevice == pseg->dev ) + goto found; - /* we've got a match! XXX SMH: should deal with - situation where we span multiple xe's */ + if ( unlikely(!IS_PRIV(p)) ) + DPRINTK("vbd_translate; domain %d attempted to access " + "non-existent VBD.\n", p->domain); - pseg->dev = x->extent.device; - pseg->sector_number += x->extent.start_sector; + spin_unlock(&p->vbd_lock); + return -ENODEV; - return 0; + found: - } + if ( ((operation == READ) && !VBD_CAN_READ(v)) || + ((operation == WRITE) && !VBD_CAN_WRITE(v)) ) + { + spin_unlock(&p->vbd_lock); + return -EACCES; + } - sec -= x->extent.nr_sectors; + /* + * Now iterate through the list of xen_extents, working out which should + * be used to perform the translation. + */ + sec_off = pseg->sector_number; + nr_secs = pseg->nr_sects; + for ( x = v->extents; x != NULL; x = x->next ) + { + if ( sec_off < x->extent.nr_sectors ) + { + pseg->dev = x->extent.device; + pseg->sector_number = x->extent.start_sector + sec_off; + if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) ) + goto overrun; + spin_unlock(&p->vbd_lock); + return 1; + } + sec_off -= x->extent.nr_sectors; } - /* No luck -- return no access */ + DPRINTK("vbd_translate: end of vbd.\n"); + spin_unlock(&p->vbd_lock); return -EACCES; -} - - - - + /* + * Here we deal with overrun onto the following extent. We don't deal with + * overrun of more than one boundary since each request is restricted to + * 2^9 512-byte sectors, so it should be trivial for control software to + * ensure that extents are large enough to prevent excessive overrun. + */ + overrun: + + /* Adjust length of first chunk to run to end of first extent. */ + pseg[0].nr_sects = x->extent.nr_sectors - sec_off; + + /* Set second chunk buffer and length to start where first chunk ended. */ + pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); + pseg[1].nr_sects = nr_secs - pseg[0].nr_sects; + + /* Now move to the next extent. Check it exists and is long enough! */ + if ( unlikely((x = x->next) == NULL) || + unlikely(x->extent.nr_sectors < pseg[1].nr_sects) ) + { + DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); + spin_unlock(&p->vbd_lock); + return -EACCES; + } + /* Store the real device and start sector for the second chunk. */ + pseg[1].dev = x->extent.device; + pseg[1].sector_number = x->extent.start_sector; + + spin_unlock(&p->vbd_lock); + return 2; +} diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index a17b7864a3..812336e725 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -114,12 +114,8 @@ struct task_struct unsigned int blk_resp_prod; /* (private version of) response producer */ struct list_head blkdev_list; spinlock_t blk_ring_lock; - struct list_head physdisk_aces; /* physdisk_ace structures - describing what bits of disk - the process can do raw access - to. */ - spinlock_t physdev_lock; - vbd_t *vbdtab[VBD_HTAB_SZ]; // mapping from 16bit vdevices to vbds + vbd_t *vbdtab[VBD_HTAB_SZ]; /* mapping from 16-bit vdevices to vbds */ + spinlock_t vbd_lock; /* VM */ struct mm_struct mm; diff --git a/xen/include/xeno/vbd.h b/xen/include/xeno/vbd.h index 9660756db8..f5fc383fcd 100644 --- a/xen/include/xeno/vbd.h +++ b/xen/include/xeno/vbd.h @@ -37,6 +37,7 @@ long vbd_delete(vbd_delete_t *delete_params); long vbd_probe(vbd_probe_t *probe_params); long vbd_info(vbd_info_t *info_params); +void destroy_all_vbds(struct task_struct *p); /* Describes a [partial] disk extent (part of a block io request) */ typedef struct { @@ -47,8 +48,7 @@ typedef struct { } phys_seg_t; -int vbd_translate(phys_seg_t * pseg, int *nr_segs, - struct task_struct *p, int operation); +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation); #endif /* __VBD_H__ */ -- 2.30.2